/*

Preschool Availability and Female Labor Force Participation: Evidence from Indonesia
Daniel Halim, Hillary C. Johnson, Elizaveta Perova

The World Bank
East Asia Pacific Gender Innovation Lab (EAPGIL)

Apr 2020


Objective: clean PODES and SUSENAS data from to be used as main explanatory variables

*/
capture log close

***************************************************************************
***PART 0: Get district conversions over time with 1993 as reference
***************************************************************************

********
* 1993 *
********

use "$raw/District codes/kab_codes_93_09",clear
keep kab*93
duplicates drop
duplicates report kabcode_des93
duplicates drop kabcode_des93, force
save "$clean/PODES/kab93",replace

********
* 1996 *
********

use "$raw/District codes/kab_codes_93_09",clear
keep kab*93 kab*96
duplicates drop
duplicates report kabcode_des96 kabcode_des93
duplicates drop kabcode_des96, force
save "$clean/PODES/kab9396",replace

********
* 2000 *
********

use "$raw/District codes/crosswalk_frame511_june14",clear
keep id_podes00 bps_1993
duplicates drop
drop if id_podes00==.
sort id_podes00
recode bps_1993 (1212=1210) (1309=1308)
save "$clean/PODES/kab9300",replace


********
* 2003 *
********

use "$raw/District codes/kab_codes_93_09",clear
keep kab*93 kab*02
duplicates drop
duplicates report kabcode_des02
duplicates drop kabcode_des02, force
save "$clean/PODES/kab9302",replace


********
* 2005 *
********

use "$raw/District codes/crosswalk_frame511_june14",clear
keep id_podes05 bps_1993
duplicates drop
drop if id_podes05==.
sort id_podes05
duplicates report id_podes05
sort bps_1993
recode bps_1993 (1212=1210) (1309=1308)
save "$clean/PODES/kab9305",replace

*** harmonize districts susenas 05 -> 1993
use "$raw/District codes/crosswalk_frame511_june14.dta",clear
keep bps_1993 id_sus05
duplicates drop
drop if id_sus05==.
duplicates report id_sus05
recode bps_1993 (1212=1210) (1309=1308)
save "$clean/SUSENAS/kab9305",replace

********
* 2008 *
********

use "$raw/District codes/kab_codes_93_09",clear
keep kab*93 kab*08
duplicates drop
duplicates report kabcode_des08
duplicates report kabcode_jul08
duplicates drop kabcode_des08, force
save "$clean/PODES/kab9308",replace

********
* 2011 *
********

use "$raw/District codes/kab_codes_93_09",clear

keep kab*93 kab*09
duplicates drop
duplicates report kabcode_agt09
duplicates drop kabcode_agt09, force

save "$clean/PODES/kab9309",replace

********
* 2014 *
********

use "$raw/District codes/crosswalk_frame511_june14",clear

recode bps_1993 (1212=1210) (1309=1308) // manual recode to be consistent with other datasets
keep bps_1993 bps_2014 province14
duplicates tag bps_2014, gen(dup)
drop if dup==1 & province14=="Prov. Kalimantan Utara"
drop dup province14

save "$clean/PODES/kab9314",replace


***************************************************************************
***PART 1: Aggregate number of preschools at kabupaten level
***************************************************************************

********
* 1990 *
********

***** find subdistricts that belong to new districts in 1993
* due to splitting districts from 1990 to 1993
use "$raw/PODES/1993/PODES93_1",clear
keep b1r1 b1r2 b1r4
duplicates drop
destring b1r1,replace
destring b1r2,replace force
gen kab93 = b1r1*100+b1r2
* keep/recode split districts
keep if kab93==1804 | kab93==3275 | kab93==5171 | kab93==5271 | kab93==7173 | kab93==8271
destring b1r4,replace
gen kab90 = kab93
recode kab90 (1472=1403) (1804=1803) (3275=3219) (5171=5103) (5271=5201) (7173=7103) (8271=8203)
drop b1r1 b1r2
save "$clean/PODES/kab9390",replace

***** work with Podes 1990
use "$raw/PODES/1990/PODES906",clear

*** harmonize districts
* rename geographical variables
ren b1r1 prov
ren b1r2 kab
* convert geographical variables to numeric
gen kab90 = prov*100 + kab
* identify subdistricts that belong to 1993 split districts
merge m:1 kab90 b1r4 using "$clean/PODES/kab9390"
** define kabcode_des93
gen kabcode_des93 = kab90
replace kabcode_des93 = kab93 if _merge==3
drop kab90 kab93 _merge
* merge w conversion
merge m:1 kabcode_des93 using "$clean/PODES/kab93"
* districts in East Timor are unmerged
gen provhead = mod(kabcode_des93,100)
drop if _merge==2 & provhead==0
tab kabcode_des93 if _merge==1
tab kabcode_des93 if _merge==2
/*
kabcode_des |
         93 |      Freq.     Percent        Cum.
------------+-----------------------------------
       1804 |          1       16.67       16.67
       3275 |          1       16.67       33.33
       5171 |          1       16.67       50.00
       5271 |          1       16.67       66.67
       7173 |          1       16.67       83.33
       8271 |          1       16.67      100.00
------------+-----------------------------------
      Total |          6      100.00
/* batam=kepri, lampung barat=lampung utara, kota tangerang=kab tangerang
	denpasar=badung, kota mataram=lombok barat, bitun=minahasa,
	kota jayapura=kab jayapura
*/
recode wedloc (1472=1403) (1804=1803) (3275=3219) (5171=5103) (5271=5201) (7173=7103) (8271=8203)
*/
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
collapse (sum) b7r1ak4 b7r1ak5 ///
	, by(kabcode_des93)

* label variables
la var b7r1ak4 "Kindergarten schools: Government"
la var b7r1ak5 "Kindergarten schools: Private"

* rename variables
ren b7r1ak4 kindergov
ren b7r1ak5 kinderpvt
ren * *90
ren kabcode_des9390 kabcode_des93

save "$clean/PODES/kab_tk90",replace

********
* 1993 *
********

use "$raw/PODES/1993/PODES93_1",clear
merge 1:1 b1r1 b1r2 b1r4 b1r5 b1r6 using "$raw/PODES/1993/PODES93_2"
drop _merge
merge 1:1 b1r1 b1r2 b1r4 b1r5 b1r6 using "$raw/PODES/1993/PODES93_3"
drop _merge

*** harmonize districts
* rename geographical variables
ren b1r1 prov
ren b1r2 kab
* convert geographical variables to numeric
destring prov, replace
destring kab, replace force
drop if missing(kab)
* merge w conversion
gen kabcode_des93 = prov*100 + kab
merge m:1 kabcode_des93 using "$clean/PODES/kab93"
* districts in East Timor are unmerged
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
collapse (sum) b5r1ak4 b5r1ak5, by(kabcode_des93)

* label variables
la var b5r1ak4 "Kindergarten schools: Government"
la var b5r1ak5 "Kindergarten schools: Private"

* rename variables
ren b5r1ak4 kindergov
ren b5r1ak5 kinderpvt
ren * *93
ren kabcode*93 kabcode*

save "$clean/PODES/kab_tk93",replace

********
* 1996 *
********

use "$raw/PODES/1996/pod96a",clear
merge 1:1 b1r1 b1r2 b1r3 b1r4 b1r5 using "$raw/PODES/1996/pod96b"
drop _merge

*** harmonize districts
* rename geographical variables (approximate: closest survey year is 1997)
ren b1r1 prov
ren b1r2 kab
gen kabcode_des96 = prov*100 + kab
merge m:1 kabcode_des96 using "$clean/PODES/kab9396"
tab kabcode_des96 if _merge==1
* districts in East Timor are unmerged
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
collapse (sum) b5r1ak2 b5r1ak3 b5r1ak4 b5r1ak5, by(kabcode_des93)

gen kindergov = b5r1ak2 + b5r1ak3
gen kinderpvt = b5r1ak4 + b5r1ak5

* label variables
la var kindergov "Kindergarten schools: Government"
la var kinderpvt "Kindergarten schools: Private"

* rename variables
ren * *96
ren kabcode_des93* kabcode_des93

* drop
drop b5r1ak296 b5r1ak396 b5r1ak496 b5r1ak596

save "$clean/PODES/kab_tk96",replace

********
* 2000 *
********

use "$raw/PODES/2000/pds20001",clear
merge 1:1 prop kab kec desa drh using "$raw/PODES/2000/pds20002"
keep if _merge==3
drop _merge

*** harmonize districts to 1993
* rename geographical variables
ren prop prov
ren kab kab
* convert geographical variables to numeric
destring prov, replace
destring kab, replace
* merge
gen id_podes00 = prov*100 + kab
merge m:1 id_podes00 using "$clean/PODES/kab9300"
tab id_podes00 if _merge==1
keep if _merge==3
drop _merge
ren bps_1993 kabcode_des93

* aggregate to kabupaten level observations
collapse (sum) b5r1a2 b5r1a3, by(kabcode_des93)

* label variables
la var b5r1a2 "Kindergarten schools: Government"
la var b5r1a3 "Kindergarten schools: Private"

* rename variables
ren b5r1a2 kindergov
ren b5r1a3 kinderpvt
ren * *00
ren kabcode_des93* kabcode_des93

save "$clean/PODES/kab_tk00",replace

********
* 2003 *
********

use "$raw/PODES/2003/PODES03a.dta",clear

*** harmonize districts to 1993
* convert geographical variables to numeric
destring prop, replace
destring kab, replace
gen kabcode_des02 = prop*100 + kab

*** manual recode for Kep Riau
recode kabcode_des02 (1410=2002) (1411=2001) (1412=2003) (1472=2071) (1474=2072)

* merge
* most of the new codes come from des99, one from des01
merge m:1 kabcode_des02 using "$clean/PODES/kab9302"
tab kabcode_des02 if _merge==1
tab kabcode_des02 if _merge==2
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
collapse (sum) b6r601a2 b6r601a3, by(kabcode_des93)

* label variables
la var b6r601a2 "Kindergarten schools: Government"
la var b6r601a3 "Kindergarten schools: Private"

* rename variables
ren b6r601a2 kindergov03
ren b6r601a3 kinderpvt03

save "$clean/PODES/kab_tk03",replace

********
* 2005 *
********

use "$raw/PODES/2005/PODES05a.dta",clear

*** harmonize districts to 1993
ren r101a prov
ren r102a kab
* convert geographical variables to numeric
*destring prop, replace
*destring kab, replace
gen id_podes05 = prov*100 + kab
/*
*** manual recode for Kep Riau
recode kabcode_des03 (7319=7602) (7320=7601) (7321=7604) (7323=7603) (7324=7605) ///
	(9405=9101) (9406=9107) (9407=9105) (9421=9102) (9422=9106) (9423=9108) (9424=9104) (9425=9103) (9472=9171)
*/
* merge
* most of the new codes come from des99, one from des01
merge m:1 id_podes05 using "$clean/PODES/kab9305"
tab id_podes05 if _merge==1
tab id_podes05 if _merge==2
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
ren bps_1993 kabcode_des93
collapse (sum) r601ak2 r601ak3, by(kabcode_des93)

* label variables
la var r601ak2 "Kindergarten schools: Government"
la var r601ak3 "Kindergarten schools: Private"

* rename variables
ren r601ak2 kindergov05
ren r601ak3 kinderpvt05

save "$clean/PODES/kab_tk05",replace

********
* 2008 *
********

use "$raw/PODES/2008/pds2008_a",clear
merge 1:1 prop kab kec desa kla using "$raw/PODES/2008/pds2008_b"
drop _merge
merge 1:1 prop kab kec desa kla using "$raw/PODES/2008/pds2008_c"
drop _merge

*** harmonize districts
* rename geographical variables (approximate: closest survey year is 2007)
ren prop prov
ren kab kab
* convert geographical variables to numeric
destring prov, replace
destring kab, replace
* merge
gen kabcode_des08 = prov*100 + kab
merge m:1 kabcode_des08 using "$clean/PODES/kab9308"
tab kabcode_des08 if _merge==1
keep if _merge==3
drop _merge

* aggregate to kabupaten level observations
collapse (sum) r601a_2 r601a_3, by(kabcode_des93)

* label variables
la var r601a_2 "Kindergarten schools: Government"
la var r601a_3 "Kindergarten schools: Private"

* rename variables
ren r601a_2 kindergov
ren r601a_3 kinderpvt
ren * *08
ren kabcode_des93* kabcode_des93

save "$clean/PODES/kab_tk08",replace


********
* 2011 *
********

* PODES 2014 is not available at microdata library

use "$raw/PODES/2011/PODES_desa_2011_d1",clear
merge 1:1 iddesa using "$raw/PODES/2011/PODES_desa_2011_d2"
drop _merge
merge 1:1 iddesa using "$raw/PODES/2011/PODES_desa_2011_d3"
drop _merge

*** harmonize districts to 1993
* rename geographical variables (approximate: closest survey year is 2014)
ren kode_prov prov
ren kode_kab kab
* merge
gen kabcode_agt09 = prov*100 + kab
merge m:1 kabcode_agt09 using "$clean/PODES/kab9309"
tab kabcode_agt09 if _merge==1
keep if _merge==3
drop _merge

* aggregate to kecamatan level observations
collapse (sum) r701ak2 r701ak3, by(kabcode_des93)

* label variables
la var r701ak2 "Kindergarten schools: Government"
la var r701ak3 "Kindergarten schools: Private"

* rename variables
ren r701ak2 kindergov
ren r701ak3 kinderpvt
ren * *11
ren kabcode_des93* kabcode_des93

save "$clean/PODES/kab_tk11",replace

********
* 2014 *
********

use "$raw/PODES/2014/PODES_desa_2014_d1_new",clear
merge 1:1 r101 r101n r102 r102n r103 r103n r104 r104n using "$raw/PODES/2014/PODES_desa_2014_d2_new"
drop _merge
merge 1:1 r101 r101n r102 r102n r103 r103n r104 r104n using "$raw/PODES/2014/PODES_desa_2014_d3_new"
drop _merge

*** harmonize districts to 1993
* rename geographical variables (approximate: closest survey year is 2014)
gen bps_2014 = r101+r102
destring bps_2014,replace
* merge
merge m:1 bps_2014 using "$clean/PODES/kab9314"
keep if _merge==3
drop _merge

* aggregate to kecamatan level observations
collapse (sum) r701a_k2 r701a_k3 ///
	, by(bps_1993)

* label variables
la var r701a_k2 "Kindergarten schools: Government"
la var r701a_k3 "Kindergarten schools: Private"

* rename variables
ren r701a_k2 kindergov
ren r701a_k3 kinderpvt
ren * *14
ren bps_1993 kabcode_des93

save "$clean/PODES/kab_tk14",replace

***************************************************************************
***PART 2: Obtain population density of 3-6 year olds in Podes years from susenas
***************************************************************************

********
* 1990 *
********

/* Note: Susenas 1990 doesn't have regular household survey, only consumption
*/

** Conclusion: use Susenas 1993 to identify the 3-6 in 1990 cohort
*	- caveat: child mortality

use "$raw/SUSENAS/IDN_1993_SUSENAS_v01_M_STATA8/Susenas93_ki",clear
gen age36 = inrange(usia,6,9)
capt destring k1r1, replace
capt destring k1r2, replace
*gen kabcode_des93 = k1r1*100 + k1r2
* convert geographical variables to numeric
gen kab90 = k1r1*100 + k1r2
destring k1r3, gen(b1r4)
* identify subdistricts that belong to 1993 split districts
merge m:1 kab90 b1r4 using "$clean/PODES/kab9390"
** define kabcode_des93
gen kabcode_des93 = kab90
replace kabcode_des93 = kab93 if _merge==3
drop kab90 kab93 _merge
merge m:1 kabcode_des93 using "$clean/PODES/kab93"
gen provhead = mod(kabcode_des93,100)
drop if _merge==2 & provhead==0
tab kabcode_des93 if _merge==1
tab kabcode_des93 if _merge==2
* districts in East Timor are unmerged
keep if _merge==3
drop _merge
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 90",replace

********
* 1993 *
********

use "$raw/SUSENAS/IDN_1993_SUSENAS_v01_M_STATA8/Susenas93_ki",clear
gen age36 = inrange(usia,3,6)
capt destring k1r1, replace
capt destring k1r2, replace
*gen kabcode_des93 = k1r1*100 + k1r2
* convert geographical variables to numeric
gen kab90 = k1r1*100 + k1r2
destring k1r3, gen(b1r4)
* identify subdistricts that belong to 1993 split districts
merge m:1 kab90 b1r4 using "$clean/PODES/kab9390"
** define kabcode_des93
gen kabcode_des93 = kab90
replace kabcode_des93 = kab93 if _merge==3
drop kab90 kab93 _merge
merge m:1 kabcode_des93 using "$clean/PODES/kab93"
gen provhead = mod(kabcode_des93,100)
drop if _merge==2 & provhead==0
tab kabcode_des93 if _merge==1
tab kabcode_des93 if _merge==2
* districts in East Timor are unmerged
keep if _merge==3
drop _merge
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 93",replace

********
* 1996 *
********

use "$raw/SUSENAS/IDN_1996_SUSENAS_v01_M_STATA8/Susenas96_ki",clear
gen age36 = inrange(usia,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen kabcode_des96 = b1r1*100 + b1r2
** harmonize districts to 1993
merge m:1 kabcode_des96 using "$clean/PODES/kab9396"
tab kabcode_des96 if _merge==1
keep if _merge==3
drop _merge
collapse (count) pop96=usia (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 96",replace

********
* 2000 *
********

use "$raw/SUSENAS/IDN_2000_SUSENAS_v01_M_STATA8/Susenas00_ki",clear
gen age36 = inrange(usia,3,6)
capt destring k1r1, replace
capt destring k1r2, replace
gen id_podes00 = k1r1*100 + k1r2
** harmonize districts to 1993
*ren kabcode_des99 kabcode_des97
/*
recode kabcode_des97 (1110=1108) (1111=1108) (1212=1210) (1213=1211) ///
	(1473=1405) (1807=1803) (1808=1806) (1872=1802) (3277=3204) (3278=3206) ///
	(6107=6105) (6108=6106) (7322=7317)
*/
merge m:1 id_podes00 using "$clean/PODES/kab9300"
tab id_podes00 if _merge==1
* districts in East Timor are unmerged
keep if _merge==3
drop _merge
ren bps_1993 kabcode_des93
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 00",replace

********
* 2003 *
********

*** harmonize districts susenas 03 -> 1993
use "$raw/District codes/crosswalk_frame511_june14.dta",clear
keep bps_1993 id_sus03
duplicates drop
drop if id_sus03==.
duplicates report id_sus03
save "$clean/Susenas/kab9303_sus",replace

*** work with susenas
use "$raw/SUSENAS/IDN_2003_SUSENAS-JUL_v01_M_STATA8/Susenas03jul_ki",clear
gen age36 = inrange(umur,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen id_sus03 = b1r1*100 + b1r2
** harmonize districts to 1993
merge m:1 id_sus03 using "$clean/Susenas/kab9303_sus"
ren bps_1993 kabcode_des93
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 03",replace

********
* 2005 *
********

*** work with susenas
use "$raw/SUSENAS/IDN_2005_SUSENAS-JUL_v01_M_STATA8/Susenas05jul_ki",clear
gen age36 = inrange(usia,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen id_sus05 = b1r1*100 + b1r2
** harmonize districts to 1993
merge m:1 id_sus05 using "$clean/Susenas/kab9305"
ren bps_1993 kabcode_des93
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 05",replace

********
* 2008 *
********

use "$raw/SUSENAS/IDN_2008_SUSENAS-JUL_v01_M_STATA8/Susenas08jul_ki",clear
gen age36 = inrange(umur,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen kabcode_des08 = b1r1*100 + b1r2
** harmonize districts to 1993
merge m:1 kabcode_des08 using "$clean/PODES/kab9308"
tab kabcode_des08 if _merge==1
keep if _merge==3
drop _merge
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 08",replace

********
* 2011 *
********

use "$raw/SUSENAS/IDN_2011_SUSENAS-JUN_STATA/Susenas11jun_ki",clear
gen age36 = inrange(umur,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen kabcode_agt09 = b1r1*100 + b1r2
gen weind = round(fwt)
** harmonize districts to 1993
merge m:1 kabcode_agt09 using "$clean/PODES/kab9309"
tab kabcode_agt09 if _merge==1
keep if _merge==3
drop _merge
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 11",replace

********
* 2014 *
********

use "$raw/SUSENAS/2014 pooled data/Susenas14pool_ki",clear
gen age36 = inrange(umur,3,6)
capt destring b1r1, replace
capt destring b1r2, replace
gen kabcode_agt09 = b1r1*100 + b1r2
gen weind = round(fwt)
** harmonize districts to 1993
merge m:1 kabcode_agt09 using "$clean/PODES/kab9309"
tab kabcode_agt09 if _merge==1
keep if _merge==3
drop _merge
collapse (sum) age36 [fw=weind], by(kabcode_des93)
save "$clean/SUSENAS/child density 14",replace

***************************************************************************
***PART 3: Merge Podes preschool with Susenas child density
***************************************************************************

foreach j in 90 93 96 00 03 05 08 11 14 {
	di "`j'"
	use "$clean/PODES/kab_tk`j'",clear
	capt drop _merge
	capt drop cdens`j'
	merge 1:1 kabcode_des93 using "$clean/SUSENAS/child density `j'"
	tab kabcode_des93 if _merge!=3
	drop _merge
	ren age36 cdens`j'
	capt ren tk tk`j'
	la var cdens`j' "Total children age 3-6"
	capt la var tk`j' "Preschool enrollment age 3-6"
	save "$clean/PODES/kab_tk`j'",replace
}
* some districts were surveyed in Podes but not in Susenas, and vice versa

***************************************************************************
***PART 4: Merge across Podes waves based on 1993 districts
***************************************************************************

use "$clean/PODES/kab_tk90",clear

foreach j in 93 96 00 03 05 08 11 14 {
	di "`j'"
	capt drop _merge
	merge 1:1 kabcode_des93 using "$clean/PODES/kab_tk`j'"
	tab kabcode_des93 if _merge!=3
	drop _merge
}

**** preschool density
foreach v in gov pvt {
foreach x in 90 93 96 00 03 05 08 11 14 {
	gen nkinder`v'`x' = kinder`v'`x'/cdens`x'*1000
}
}

**** growth since 2003
foreach v in gov pvt {
	gen gkinder`v' = (nkinder`v'14/nkinder`v'03)-1
}

**** density distribution
foreach v in gov pvt {
foreach y in 90 91 92 93 94 95 96 97 98 99 00 01 02 03 05{
	sum gkinder`v',d
	qui kdensity gkinder`v', xline(`r(p50)') name(`v',replace)
}
}
// Note: median public growth = 0.85, private = 0.92 --> high=1, doubling stock

**** define high growth areas
foreach v in gov pvt {
	gen high`v' = gkinder`v'>1
}

save "$clean/PODES/kindergarten across podes",replace


***************************************************************************
***PART 5: Infer density between years
***************************************************************************

use "$clean/PODES/kindergarten across podes",clear

keep nkindergov* nkinderpvt* kabcode_des93


ren *90 *1990
ren nkinder*93 nkinder*1993
ren *96 *1996
ren *00 *2000
ren *03 *2003
ren *05 *2005
ren *08 *2008
ren *11 *2011
ren *14 *2014

******************** fill in linear projection

*** 1) 1988-1993, missing for kab 8271, check for 1207 for negative values
foreach v in gov pvt {
foreach x in 1988 1989 1991 1992 {
	gen nkinder`v'`x' = ((nkinder`v'1993-nkinder`v'1990)/(1993-1990)*(`x'-1990)) + nkinder`v'1990
}
}
*** 2) 1993-1996, missing for kab 8271
foreach v in gov pvt {
forval x = 1994/1995 {
	gen nkinder`v'`x' = ((nkinder`v'1996-nkinder`v'1993)/(1996-1993)*(`x'-1993)) + nkinder`v'1993
}
}
*** 3) 1996-2000, 30 kab missing
foreach v in gov pvt {
forval x = 1997/1999 {
	gen nkinder`v'`x' = ((nkinder`v'2000-nkinder`v'1996)/(2000-1996)*(`x'-1996)) + nkinder`v'1996
}
}
*** 4) 2000-2003, 30 kab missing
foreach v in gov pvt {
forval x = 2001/2002 {
	gen nkinder`v'`x' = ((nkinder`v'2003-nkinder`v'2000)/(2003-2000)*(`x'-2000)) + nkinder`v'2000
}
}
*** 5) 2003-2005, 13 kab missing
foreach v in gov pvt {
forval x = 2004/2004 {
	gen nkinder`v'`x' = ((nkinder`v'2005-nkinder`v'2003)/(2005-2003)*(`x'-2003)) + nkinder`v'2003
}
}
*** 6) 2005-2008, 12 kab missing
foreach v in gov pvt {
forval x = 2006/2007 {
	gen nkinder`v'`x' = ((nkinder`v'2008-nkinder`v'2005)/(2008-2005)*(`x'-2005)) + nkinder`v'2005
}
}
*** 7) 2008-2011, 6 kab missing
foreach v in gov pvt {
forval x = 2009/2010 {
	gen nkinder`v'`x' = ((nkinder`v'2011-nkinder`v'2008)/(2011-2008)*(`x'-2008)) + nkinder`v'2008
}
}
*** 8) 2011-2015, 6 kab missing
foreach v in gov pvt {
foreach x in 2012 2013 2015 {
	gen nkinder`v'`x' = ((nkinder`v'2014-nkinder`v'2011)/(2014-2011)*(`x'-2011)) + nkinder`v'2011
}
}

order nkindergov1988 nkindergov1989 nkindergov1990 nkindergov1991 nkindergov1992 ///
	nkindergov1993 nkindergov1994 nkindergov1995 nkindergov1996 nkindergov1997 ///
	nkindergov1998 nkindergov1999 nkindergov2000 nkindergov2001 nkindergov2001 ///
	nkindergov2002 nkindergov2003 nkindergov2004 nkindergov2005 nkindergov2006 ///
	nkindergov2007 nkindergov2008 nkindergov2009 nkindergov2010 nkindergov2011 ///
	nkindergov2012 nkindergov2013 nkindergov2014 nkindergov2015 ///
	nkinderpvt1988 nkinderpvt1989 nkinderpvt1990 nkinderpvt1991 nkinderpvt1992 ///
	nkinderpvt1993 nkinderpvt1994 nkinderpvt1995 nkinderpvt1996 nkinderpvt1997 ///
	nkinderpvt1998 nkinderpvt1999 nkinderpvt2000 nkinderpvt2001 nkinderpvt2001 ///
	nkinderpvt2002 nkinderpvt2003 nkinderpvt2004 nkinderpvt2005 nkinderpvt2006 ///
	nkinderpvt2007 nkinderpvt2008 nkinderpvt2009 nkinderpvt2010 nkinderpvt2011 ///
	nkinderpvt2012 nkinderpvt2013 nkinderpvt2014 nkinderpvt2015, after(kab)
misstable sum

*** 9) correcting for missing 1990, 1993 in kab 8271 --> use data from 1996 and 2000
foreach v in gov pvt {
forval x = 1988/1995 {
	replace nkinder`v'`x' = ((nkinder`v'2000-nkinder`v'1996)/(2000-1996)*(`x'-1996)) + nkinder`v'1996 if nkinder`v'`x'==.
}
}
misstable sum

*** 10) correcting for missing 2000 in 30 kab --> use data from 1996 and 2003
foreach v in gov pvt {
forval x = 1997/2002 {
	replace nkinder`v'`x' = ((nkinder`v'2003-nkinder`v'1996)/(2003-1996)*(`x'-1996)) + nkinder`v'1996 if nkinder`v'`x'==.
}
}
misstable sum // kab 8203

*** 10) correcting for missing 2000, 2003 in 1 kab --> use data from 1996 and 2005
foreach v in gov pvt {
forval x = 1997/2004 {
	replace nkinder`v'`x' = ((nkinder`v'2005-nkinder`v'1996)/(2005-1996)*(`x'-1996)) + nkinder`v'1996 if kab==8203
}
}

misstable sum
*** 12) correcting for missing 2005 in 12 kab --> use data from 2003 and 2008, check 1172 (fine!)
foreach v in gov pvt {
forval x = 2004/2007 {
	replace nkinder`v'`x' = ((nkinder`v'2008-nkinder`v'2003)/(2008-2003)*(`x'-2003)) + nkinder`v'2003 if nkinder`v'`x'==.
}
}
misstable sum

*** 13) correcting for missing 2011 in 6 kab --> use data from 2008 and 2014
foreach v in gov pvt {
foreach x in 2009 2010 2011 2012 2013 2015 {
	replace nkinder`v'`x' = ((nkinder`v'2014-nkinder`v'2008)/(2014-2008)*(`x'-2008)) + nkinder`v'2008 if nkinder`v'`x'==.
}
}
misstable sum

*** 14) replace all negative values with 0
foreach v in gov pvt {
forval x=1988/2015 {
	replace nkinder`v'`x' = 0 if nkinder`v'`x'<0
}
}

**** growth since 2003
foreach v in gov pvt {
	gen gkinder`v' = (nkinder`v'2014/nkinder`v'2003)-1
}

foreach v in gov pvt {
	gen hightk`v' = gkinder`v'>1
}

*** reshape
reshape long nkindergov nkinderpvt, i(kabcode_des93) j(year)
sort kabcode year

ren nkinder* tkdens*

la var tkdensgov "Public preschool density imputed"
la var tkdenspvt "Private preschool density imputed"

save "$clean/PODES/preschool density imputed",replace